Graph1: shows that among all the power station provider, the top electricity demand city is LA.
Graph2: shows top 10 cities’ relationship between electricity consumption and housing prices in CA
Graph3: shows that before 2010, the electricity consumption grows with year, and after 2010, the electricity consumption in LA become stable.
Graph4: LA housing price distribution with local region income median and population.
Graph5: The distribution of housing prices among different ocean proximity in LA.
Graph6: The wordcloud generated from related news and article.
Dataset & Reference:
County LA
1 2019 19562.55
2 2018 20516.12
3 2017 20663.27
4 2016 20287.96
5 2015 20432.53
6 2014 20742.77
7 2013 20611.29
8 2012 21076.22
9 2011 20064.62
10 2010 19721.28
11 2009 20590.38
12 2008 21115.74
13 2007 20536.16
14 2006 20377.01
15 2005 19711.01
16 2004 19507.01
17 2003 19056.04
18 2002 17917.34
19 2001 18212.58
20 2000 18891.64
21 1999 17665.12
22 1998 17234.63
23 1997 17578.66
24 1996 16322.05
25 1995 16823.63
26 1994 16065.58
27 1993 15759.08
28 1992 16377.79
29 1991 16310.88
30 1990 16960.72
31 Total Usage 566691.67
---
title: "Explore Housing and Electricity Relationship in LA"
output:
flexdashboard::flex_dashboard:
storyboard: true
social: menu
source: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
```
```{r}
library(tidyverse)
library(ggplot2)
library(scales)
library(viridis)
library(igraph)
library(plotly)
library(sp)
library(maps)
library(maptools)
library(wordcloud2)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
Sys.setenv(MAPBOX_TOKEN = 11122223333444)
```
```{r}
electricity = read.csv("Electricity.csv")
groupbys <- electricity %>%
select('Owner', 'County', 'Highest_kV') %>%
group_by(County) %>%
drop_na()
groupbys['weight'] <- as.numeric(factor(groupbys[['Highest_kV']]))
groupbys <- groupbys %>%
select('Owner', 'County', 'weight')
groupbys <- groupbys %>%
filter(County %in% c('Los Angeles', 'San Diego',
'Orange', 'Riverside', 'San Bernardino',
'Santa Clara', 'Sacramento', 'San Francisco',
'Kern', 'Fresno'))
graphdf <- groupbys %>%
group_by(Owner, County) %>%
summarize(total_weight = sum(weight))
graphdf <- graphdf %>% drop_na()
```
### ABOUT
Graph1: shows that among all the power station provider, the top electricity demand city is LA.
Graph2: shows top 10 cities' relationship between electricity consumption and housing prices in CA
Graph3: shows that before 2010, the electricity consumption grows with year, and after 2010, the electricity consumption in LA become stable.
Graph4: LA housing price distribution with local region income median and population.
Graph5: The distribution of housing prices among different ocean proximity in LA.
Graph6: The wordcloud generated from related news and article.
Dataset & Reference:
1. https://www.kaggle.com/camnugent/california-housing-prices
2. http://www.ecdms.energy.ca.gov/elecbycounty.aspx
3. https://www.noradarealestate.com/blog/los-angeles-real-estate-market/
4. https://cecgis-caenergy.opendata.arcgis.com/datasets/7f37f2535d3144e898a53b9385737ee0_0/data?geometry=-160.990%2C31.125%2C-77.362%2C43.323
### Graph1: The network relationship between electricity station owner and top 10 CA cities.
```{r}
graphdf1 <- graph_from_data_frame(graphdf, directed = TRUE)
V(graphdf1)[1:10]$colour = 'gray'
V(graphdf1)[10]$colour = 'chocolate'
V(graphdf1)[11]$colour = 'chocolate1'
V(graphdf1)[12]$colour = 'chocolate2'
V(graphdf1)[13]$colour = 'chocolate3'
V(graphdf1)[14]$colour = 'coral'
V(graphdf1)[15]$colour = 'coral1'
V(graphdf1)[16]$colour = 'coral2'
V(graphdf1)[17]$colour = 'coral3'
V(graphdf1)[18]$colour = 'burlywood1'
V(graphdf1)[19]$colour = 'burlywood2'
E(graphdf1)$weight <- 2*scale(graphdf$total_weight) + 3
par(mar = c(0.3, 0.3, 1, 0.3))
plot(graphdf1,
layout=layout_in_circle,
edge.arrow.size = 0.3,
vertex.size=20,
edge.width = E(graphdf1)$weight,
vertex.color = adjustcolor(V(graphdf1)$colour,alpha.f = .8))
```
```{r}
consumption <- read.csv("Consumption.csv")
```
### Graph2: Top10 cities electricity consumption and housing prices
```{r}
housing = read_csv("housing.csv")
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF[1:2],
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = c(-90, -120), y = c(44, 44))
housing['county'] = latlong2county(housing)
top10 <- housing %>%
filter(county %in% c("california,los angeles", "california,san diego", "california,riverside",
"california,kern", "california,san bernardino", "california,fresno",
"california,orange", "california,sacramento", "california,san francisco",
"california,santa clara")) %>%
group_by(county) %>%
summarize(avg_price = mean(median_house_value)) %>%
drop_na()
consumption_county <- read.csv("ElectricityByCounty.csv")
temp <- consumption_county %>%
filter(county %in% c("LOS ANGELES", "SAN DIEGO", "RIVERSIDE", "KERN",
"SAN BERNARDINO", "FRESNO", "ORANGE", "SACRAMENTO",
"SAN FRANCISCO", "SANTA CLARA")) %>%
select(county,X2000)
top10['consumption'] = temp['X2000']
```
```{r}
fig <- plot_ly(top10, x = ~county, y = ~consumption, type = "bar", name = "Electricity Consumption") %>%
add_lines(x = ~county, y = ~avg_price, yaxis = "y2", name = "Average Housing Price") %>%
layout(yaxis2 = list(overlaying = "y", side = "right"))
fig
```
### Graph3: Electricity Consumption in LA with Time Series
```{r}
fig <- plot_ly(
type = "scatter",
x = as.Date(consumption$County, format = "%Y"),
y = consumption$LA,
name = 'Electricity Consumption in LA',
mode = "markers",
)
fig <- fig %>%
layout(
title = "Electricity Consumption in LA"
)
fig
```
***
```{r}
consumption
```
```{r}
housing = read_csv("housing.csv")
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF[1:2],
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = c(-90, -120), y = c(44, 44))
housing['county'] = latlong2county(housing)
housing <- housing %>%
filter(county %in% c("california,los angeles"))
housing$Category[housing$median_house_value<=100000] = '<100K'
housing$Category[housing$median_house_value>100000 & housing$median_house_value<=200000] = '100K~200K'
housing$Category[housing$median_house_value>200000 & housing$median_house_value<=300000] = '200K~300K'
housing$Category[housing$median_house_value>300000 & housing$median_house_value<=400000] = '300K~400K'
housing$Category[housing$median_house_value>400000 ] = '>400K'
```
### Graph4: Los Angeles Housing Prices Distribution With Population.
```{r}
options(warn=-1)
plot_map = ggplot(housing,
aes(x = longitude, y = latitude, color = median_house_value,
hma = housing_median_age, tr = total_rooms, tb = total_bedrooms,
hh = households, mi = median_income)) +
geom_point(aes(size = population), alpha = 0.6) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Los Angeles Housing Price Distribution") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis(option = "D", labels = comma) +
labs(color = "Median House Value (in $USD)", size = "Population")
plot_map
```
### Graph5: The distribution of housing price amoung ocean proximity in LA
```{r}
fig <- plot_ly(housing, x = ~ocean_proximity, color = ~Category) %>%
add_histogram()
fig <- fig %>%
layout(
title = "The distribution of housing price amoung ocean proximity in LA"
)
fig
```
### Graph6: WordCloud of LA House and Electricity Article
```{r}
text <- readLines('LA_housing_overview.txt')
text <- sapply(text,function(row) iconv(row, "latin1", "ASCII", sub=""))
corpus = VCorpus(VectorSource(text))
corpus = tm_map(corpus, content_transformer(tolower))
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, removePunctuation)
corpus = tm_map(corpus, removeWords, stopwords())
#corpus = tm_map(corpus, stemDocument)
corpus = tm_map(corpus, stripWhitespace)
```
```{r}
dtm = DocumentTermMatrix(corpus)
dtm = removeSparseTerms(dtm, 0.9999)
dataset = as.matrix(dtm)
v = sort(colSums(dataset),decreasing=TRUE)
myNames = names(v)
d = data.frame(word=myNames,freq=v)
pal2 <- brewer.pal(8,"Dark2")
wordcloud(d$word, colors = pal2,scale=c(4,.6), random.color=FALSE, d$freq, min.freq=15, max.words=200, random.order=FALSE)
```